# ==============================================================================
# name: RQ2-sociodemographics.R
# date:	Jan 25, 2022
# author: Bernhard Clemm / Tiago Ventura
# purpose: compare professionals and non-professionals on sociodemographics and political outcomes
# ==============================================================================

rm(list = ls())

source("code/utils/functions.R")

outcomes <- c(
  "age", "female", "edu_high", "white",
  "party", "ideo", "ft_outparty",
  "int_politics", "foll_politics", "polknow"
)

# POPULATION DATA ==============================================================

## Census ####

census_age_sex <- read.xlsx(
  "data/surveys_raw/population/census/ACSST5Y2020.S0101-2022-07-06T081635.xlsx",
  sheet = 2
)

names(census_age_sex)[1] <- "variable"

census_total <- census_age_sex %>%
  filter(variable == "Total population") %>%
  pull(`United.States`) %>%
  gsub(",", "", .) %>%
  as.numeric()

census_age_median <- census_age_sex %>%
  filter(variable == "Median age (years)") %>%
  pull(United.States) %>%
  as.numeric()

census_female <- census_age_sex %>%
  filter(variable == "Total population") %>%
  pull(X10) %>%
  gsub(",", "", .) %>%
  as.numeric()

census_female_perc <- census_female / census_total * 100

census_race <- read.xlsx(
  "data/surveys_raw/population/census/DECENNIALPL2020.P1-2022-07-06T074603.xlsx",
  sheet = 2
)

census_white <- census_race %>%
  filter(Label == "White alone") %>%
  pull(`United.States`) %>%
  gsub(",", "", .) %>%
  as.numeric()

census_white_perc <- census_white / census_total * 100

census_education <- read.xlsx(
  "data/surveys_raw/population/census/ACSST5Y2020.S1501-2022-07-06T083022.xlsx",
  sheet = 2
)

names(census_education)[1] <- "variable"

census_education <- census_education[1:18, ]

census_total_over18 <- census_education %>%
  filter(variable %in% c(
    "Population 18 to 24 years",
    "Population 25 years and over"
  )) %>%
  pull(United.States) %>%
  gsub(",", "", .) %>%
  as.numeric() %>%
  sum()

census_eduhigh <- census_education %>%
  filter(variable %in% c("Bachelor's degree or higher")) %>%
  pull(United.States) %>%
  gsub(",", "", .) %>%
  as.numeric() %>%
  sum()
census_eduhigh_perc <- census_eduhigh / census_total_over18 * 100

summary_census <- data.frame(
  value = c(
    census_age_median, census_female_perc,
    census_eduhigh_perc, census_white_perc
  ),
  variable = c("age", "female", "edu_high", "white")
) %>%
  mutate(value = round(value, 1))

## ANES 2020 ####

anes_2020 <- read_sav(
  "data/surveys_raw/population/anes_2020/anes_timeseries_2020_spss_20220210.sav"
)

anes_2020 <- anes_2020 %>%
  rename(
    "party_repmoddem" = V201228,
    "party_repdem" = V201229,
    "party_mod" = V201230,
    "ideo" = V201200,
    "ft_dem_party" = V201156,
    "ft_rep_party" = V201157,
    # "trust_gov" = V201233,
    "int_politics" = V202406,
    "weight_pre" = V200010a,
    "psu" = V200010c,
    "stratum" = V200010d,
    "weight_post" = V200010b
  ) %>%
  # Create a 1-7 party measure
  mutate(
    party = case_when(
      party_repmoddem == 1 & party_repdem == 1 ~ 1,
      party_repmoddem == 1 & party_repdem == 2 ~ 2,
      party_repmoddem %in% c(0, 3, 5, -8) & party_mod == 3 ~ 3,
      party_repmoddem %in% c(0, 3, 5, -8) & party_mod %in% c(-8, -9, 2) ~ 4,
      party_repmoddem %in% c(0, 3, 5, -8) & party_mod == 1 ~ 5,
      party_repmoddem == 2 & party_repdem == 2 ~ 6,
      party_repmoddem == 2 & party_repdem == 1 ~ 7
    )
  ) %>%
  # Recode all special values to NA
  mutate(
    across(c(
      ideo, # trust_gov,
      int_politics
    ), ~ ifelse(. %in% c(-9:-1, 99), NA, .)),
    across(c(ft_dem_party, ft_rep_party), ~ ifelse(. %in% c(-9:-4, 998), NA, .))
  ) %>%
  # Recode trust, political interest and ideology to a 0-1 scale
  mutate(
    # trust_gov = (trust_gov - 1) / (5 - 1),
    int_politics = (int_politics - 1) / (4 - 1),
    ideo = (ideo - 1) / (7 - 1)
  ) %>%
  # Create feeling thermometer for out-party
  mutate(
    ft_outparty = case_when(
      party_repmoddem == 1 ~ ft_rep_party,
      party_repmoddem == 2 ~ ft_dem_party
    )
  ) %>%
  # Create political knowledge index with the 3 items we have in survey)
  mutate(
    polknow_senator = ifelse(V201644 == 6, 1, 0),
    polknow_spend = ifelse(V201645 == 1, 1, 0),
    polknow_majority = ifelse(V201646 == 1, 1, 0)
  ) %>%
  mutate(
    polknow = rowMeans(select(., c(
      "polknow_senator", "polknow_spend", "polknow_majority"
    )), na.rm = T)
  ) %>%
  select(
    party, ideo, int_politics, polknow, ft_outparty, #  trust_gov,
    weight_pre, psu, stratum, weight_post
  )

# Create survey design (cf. p.5 of ANES codebook)
## Since political interest measured post, separate design needed
anes_2020_design_pre <- svydesign(
  id = ~psu, strata = ~stratum, weights = ~weight_pre,
  data = anes_2020, nest = T
)

anes_2020_design_post <- svydesign(
  id = ~psu, strata = ~stratum, weights = ~weight_post,
  data = anes_2020 %>% filter(!is.na(weight_post)), nest = T
)

summary_anes <- rbind(
  svymean(
    ~ party + ideo + # trust_gov +
      ft_outparty + polknow,
    anes_2020_design_pre,
    na.rm = T
  ) %>% as.data.frame(),
  svymean(~ int_politics + party, anes_2020_design_post, na.rm = T) %>% as.data.frame()
) %>%
  mutate(variable = rownames(.)) %>%
  filter(variable != "party1") %>%
  mutate(
    SE = round(SE, 3),
    mean = ifelse(variable == "ft_outparty",
      round(mean, 1), round(mean, 2)
    )
  ) %>%
  mutate(value = paste0(mean, " (", SE, ")")) %>%
  select(variable, value)

## Combine ####

summary_pop <- rbind(summary_census, summary_anes) %>%
  rename(population = "value")

rm(
  anes_2020, anes_2020_design_post, anes_2020_design_pre,
  census_age_sex, census_education, census_race,
  census_age_median, census_eduhigh, census_eduhigh_perc,
  census_female, census_female_perc, census_total,
  census_total_over18, census_white, census_white_perc,
  summary_anes, summary_census
)

# READ IN, FILTER AND BIND DATA ================================================

# Seven-day filter:
## "Across samples, some donating subjects participated in the surveys but provided little
## browsing data. As such subjects would distort some of the proportional metrics we calculate—
## for example, someone who submitted five visits in total, all of which are to a survey site, would
## be treated as doing surveys 100 percent of the time—we exclude subjects who submitted data
## from less than seven days."

fb <- read.csv("data/analysis_FB.csv")

lu <- read.csv("data/analysis_LU.csv")

yg <- read.csv("data/analysis_YG.csv")

profs_fb <- fb %>%
  # we can subset to W1 because all survey outcomes in W1
  filter(n_days_active >= 7 & wave == 1) %>%
  mutate(person_id = as.character(person_id)) %>%
  select(
    dataset, person_id, weight,
    starts_with("professional_"),
    any_of(outcomes)
  ) %>%
  mutate(weight = ifelse(is.na(weight), 1, weight))

# in Lucid, some outcomes measured only in W2/W3
profs_lu_w1 <- lu %>%
  filter(n_days_active >= 7 & wave == 1) %>%
  mutate(person_id = as.character(person_id)) %>%
  select(
    dataset, person_id, weight,
    starts_with("professional_"),
    any_of(outcomes)
  ) %>%
  select(-c(int_politics, polknow)) %>%
  mutate(weight = ifelse(is.na(weight), 1, weight))

profs_lu_w2 <- lu %>%
  filter(n_days_active >= 7 & wave == 2) %>%
  mutate(person_id = as.character(person_id)) %>%
  select(
    dataset, person_id, weight,
    starts_with("professional_"),
    int_politics
  ) %>%
  mutate(weight = ifelse(is.na(weight), 1, weight))

profs_lu_w3 <- lu %>%
  filter(n_days_active >= 7 & wave == 3) %>%
  mutate(person_id = as.character(person_id)) %>%
  select(
    dataset, person_id, weight,
    starts_with("professional_"),
    polknow
  ) %>%
  mutate(weight = ifelse(is.na(weight), 1, weight))

profs_yg <- yg %>%
  filter(n_days_active >= 7) %>%
  mutate(person_id = as.character(person_id)) %>%
  select(
    dataset, person_id, weight,
    starts_with("professional_"),
    any_of(outcomes)
  )

# MAIN PAPER ===================================================================

## Table 1: Survey professionals vs. non-professionals vs. population ####

### Facebook ####

summary_fb <- make_table(
  dt = profs_fb,
  var_prof = "professional_1",
  outcomes_prop = c("female", "edu_high", "white"),
  outcomes_median = c("age"),
  outcomes_mean_raw = c("party", "ft_outparty"),
  outcomes_mean_01 = c("ideo", "int_politics", "foll_politics"),
  order = c(
    "age", "female", "edu_high", "white", "party",
    "ideo", "ft_outparty", "int_politics", "foll_politics"
  ),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

# age is a group indicator for the FB data, so we label it with the age bracket
summary_fb <- summary_fb %>%
  mutate(
    pro = ifelse(variable == "age", gsub(" \\(.+\\)", "", pro), pro),
    nonpro = ifelse(variable == "age", gsub(" \\(.+\\)", "", nonpro), nonpro)
  ) %>%
  mutate(across(c(pro, nonpro), ~ case_when(
    . == "4" ~ "30-34",
    . == "5" ~ "35-39",
    . == "6" ~ "40-44",
    . == "7" ~ "45-49",
    .default = .
  )))

### Lucid ####

summary_lu_w1 <- make_table(
  dt = profs_lu_w1,
  var_prof = "professional_1",
  outcomes_prop = c("female", "edu_high", "white"),
  outcomes_median = c("age"),
  outcomes_mean_raw = c("party", "ft_outparty"),
  outcomes_mean_01 = c("ideo"),
  order = c(
    "age", "female", "edu_high", "white", "party",
    "ideo", "ft_outparty"
  ),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

summary_lu_w2 <- make_table(
  dt = profs_lu_w2,
  var_prof = "professional_1",
  outcomes_prop = NULL,
  outcomes_median = NULL,
  outcomes_mean_raw = NULL,
  outcomes_mean_01 = c("int_politics"),
  order = c("int_politics"),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

summary_lu_w3 <- make_table(
  dt = profs_lu_w3,
  var_prof = "professional_1",
  outcomes_prop = NULL,
  outcomes_median = NULL,
  outcomes_mean_raw = NULL,
  outcomes_mean_01 = c("polknow"),
  order = c("polknow"),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

summary_lu <- rbind(summary_lu_w1, summary_lu_w2, summary_lu_w3)

summary_lu <- summary_lu %>%
  # we do not report SD for median
  mutate(
    pro = ifelse(variable == "age", gsub(" \\(.+\\)", "", pro), pro),
    nonpro = ifelse(variable == "age", gsub(" \\(.+\\)", "", nonpro), nonpro)
  )

### YouGov ####

summary_yg <- make_table(
  dt = profs_yg,
  var_prof = "professional_1",
  outcomes_prop = c("female", "edu_high", "white"),
  outcomes_median = c("age"),
  outcomes_mean_raw = c("party", "ft_outparty"),
  outcomes_mean_01 = c("ideo", "int_politics", "foll_politics", "polknow"),
  order = c(
    "age", "female", "edu_high", "white", "party",
    "ideo", "ft_outparty", "int_politics", "foll_politics", "polknow"
  ),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

summary_yg <- summary_yg %>%
  # we do not report SD for median
  mutate(
    pro = ifelse(variable == "age", gsub(" \\(.+\\)", "", pro), pro),
    nonpro = ifelse(variable == "age", gsub(" \\(.+\\)", "", nonpro), nonpro)
  )

### Combine ####

summary_all <- summary_lu %>%
  select(-p) %>%
  rename("lucid_pro" = pro, "lucid_nonpro" = nonpro, "lucid_sig" = sig) %>%
  full_join(., summary_yg %>% select(-p) %>%
    rename("yg_pro" = pro, "yg_nonpro" = nonpro, "yg_sig" = sig)) %>%
  full_join(., summary_fb %>% select(-p) %>%
    rename("fb_pro" = pro, "fb_nonpro" = nonpro, "fb_sig" = sig)) %>%
  full_join(., summary_pop) %>%
  mutate(varname = case_when(
    variable == "age" ~ "Age (median years) ",
    variable == "female" ~ "Gender (\\% female)",
    variable == "edu_high" ~ "Education (\\% Bachelor or more)",
    variable == "white" ~ "Ethnicity (\\% white)",
    variable == "ideo" ~ "Ideology (0-1)",
    variable == "party" ~ "Partisanship (1-7)",
    variable == "ft_outparty" ~ "Thermometer out-party (1-100)",
    variable == "int_politics" ~ "Political interest (0-1)",
    variable == "polknow" ~ "Political knowledge (0-1)",
    variable == "foll_politics" ~ "Following politics (0-1)"
  )) %>%
  mutate(across(everything(), ~ ifelse(is.na(.), "", .))) %>%
  select(
    varname, population,
    fb_pro, fb_sig, fb_nonpro,
    lucid_pro, lucid_sig, lucid_nonpro,
    yg_pro, yg_sig, yg_nonpro
  )

export_kable_sociodem(
  dt = summary_all,
  caption = "Survey professionals vs. non-professionals vs. population (professionals = more than 100 survey visits / day)",
  format = "html",
  file = "output/tab1_rq2_comparison_sociodems.html"
)

# SM D.1 Alternative professionalism indicators ================================

## Table D.4: Survey professionals vs. non-professionals vs. population (measure 2) ####

### Facebook ####

summary_fb <- make_table(
  dt = profs_fb,
  var_prof = "professional_2",
  outcomes_prop = c("female", "edu_high", "white"),
  outcomes_median = c("age"),
  outcomes_mean_raw = c("party", "ft_outparty"),
  outcomes_mean_01 = c("ideo", "int_politics", "foll_politics"),
  order = c(
    "age", "female", "edu_high", "white", "party",
    "ideo", "ft_outparty", "int_politics", "foll_politics"
  ),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

# age is a group indicator for the FB data, so we label it with the age bracket
summary_fb <- summary_fb %>%
  mutate(
    pro = ifelse(variable == "age", gsub(" \\(.+\\)", "", pro), pro),
    nonpro = ifelse(variable == "age", gsub(" \\(.+\\)", "", nonpro), nonpro)
  ) %>%
  mutate(across(c(pro, nonpro), ~ case_when(
    . == "4" ~ "30-34",
    . == "5" ~ "35-39",
    . == "6" ~ "40-44",
    . == "7" ~ "45-49",
    .default = .
  )))

### Lucid ####

summary_lu_w1 <- make_table(
  dt = profs_lu_w1,
  var_prof = "professional_2",
  outcomes_prop = c("female", "edu_high", "white"),
  outcomes_median = c("age"),
  outcomes_mean_raw = c("party", "ft_outparty"),
  outcomes_mean_01 = c("ideo"),
  order = c(
    "age", "female", "edu_high", "white", "party",
    "ideo", "ft_outparty"
  ),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

summary_lu_w2 <- make_table(
  dt = profs_lu_w2,
  var_prof = "professional_2",
  outcomes_prop = NULL,
  outcomes_median = NULL,
  outcomes_mean_raw = NULL,
  outcomes_mean_01 = c("int_politics"),
  order = c("int_politics"),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

summary_lu_w3 <- make_table(
  dt = profs_lu_w3,
  var_prof = "professional_2",
  outcomes_prop = NULL,
  outcomes_median = NULL,
  outcomes_mean_raw = NULL,
  outcomes_mean_01 = c("polknow"),
  order = c("polknow"),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

summary_lu <- rbind(summary_lu_w1, summary_lu_w2, summary_lu_w3)

summary_lu <- summary_lu %>%
  # we do not report SD for median
  mutate(
    pro = ifelse(variable == "age", gsub(" \\(.+\\)", "", pro), pro),
    nonpro = ifelse(variable == "age", gsub(" \\(.+\\)", "", nonpro), nonpro)
  )

### YouGov ####

summary_yg <- make_table(
  dt = profs_yg,
  var_prof = "professional_2",
  outcomes_prop = c("female", "edu_high", "white"),
  outcomes_median = c("age"),
  outcomes_mean_raw = c("party", "ft_outparty"),
  outcomes_mean_01 = c("ideo", "int_politics", "foll_politics", "polknow"),
  order = c(
    "age", "female", "edu_high", "white", "party",
    "ideo", "ft_outparty", "int_politics", "foll_politics", "polknow"
  ),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

summary_yg <- summary_yg %>%
  # we do not report SD for median
  mutate(
    pro = ifelse(variable == "age", gsub(" \\(.+\\)", "", pro), pro),
    nonpro = ifelse(variable == "age", gsub(" \\(.+\\)", "", nonpro), nonpro)
  )

### Combine ####

summary_all <- summary_lu %>%
  select(-p) %>%
  rename("lucid_pro" = pro, "lucid_nonpro" = nonpro, "lucid_sig" = sig) %>%
  full_join(., summary_yg %>% select(-p) %>%
    rename("yg_pro" = pro, "yg_nonpro" = nonpro, "yg_sig" = sig)) %>%
  full_join(., summary_fb %>% select(-p) %>%
    rename("fb_pro" = pro, "fb_nonpro" = nonpro, "fb_sig" = sig)) %>%
  full_join(., summary_pop) %>%
  mutate(varname = case_when(
    variable == "age" ~ "Age (median years) ",
    variable == "female" ~ "Gender (\\% female)",
    variable == "edu_high" ~ "Education (\\% Bachelor or more)",
    variable == "white" ~ "Ethnicity (\\% white)",
    variable == "ideo" ~ "Ideology (0-1)",
    variable == "party" ~ "Partisanship (1-7)",
    variable == "ft_outparty" ~ "Thermometer out-party (1-100)",
    variable == "int_politics" ~ "Political interest (0-1)",
    variable == "polknow" ~ "Political knowledge (0-1)",
    variable == "foll_politics" ~ "Following politics (0-1)"
  )) %>%
  mutate(across(everything(), ~ ifelse(is.na(.), "", .))) %>%
  select(
    varname, population,
    fb_pro, fb_sig, fb_nonpro,
    lucid_pro, lucid_sig, lucid_nonpro,
    yg_pro, yg_sig, yg_nonpro
  )

# for latex, replace "html" by "latex"

export_kable_sociodem(
  dt = summary_all,
  caption = "Survey professionals vs. non-professionals vs. population (professional = more than 50 percent of visits to survey sites)",
  format = "html",
  file = "output/tabD4_rq2_comparison_sociodems_2.html"
)


## Table D.5: Survey professionals vs. non-professionals vs. population (measure 3) ####

### Facebook ####

summary_fb <- make_table(
  dt = profs_fb,
  var_prof = "professional_3",
  outcomes_prop = c("female", "edu_high", "white"),
  outcomes_median = c("age"),
  outcomes_mean_raw = c("party", "ft_outparty"),
  outcomes_mean_01 = c("ideo", "int_politics", "foll_politics"),
  order = c(
    "age", "female", "edu_high", "white", "party",
    "ideo", "ft_outparty", "int_politics", "foll_politics"
  ),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

# age is a group indicator for the FB data, so we label it with the age bracket
summary_fb <- summary_fb %>%
  mutate(
    pro = ifelse(variable == "age", gsub(" \\(.+\\)", "", pro), pro),
    nonpro = ifelse(variable == "age", gsub(" \\(.+\\)", "", nonpro), nonpro)
  ) %>%
  mutate(across(c(pro, nonpro), ~ case_when(
    . == "4" ~ "30-34",
    . == "5" ~ "35-39",
    . == "6" ~ "40-44",
    . == "7" ~ "45-49",
    .default = .
  )))

### Lucid ####

summary_lu_w1 <- make_table(
  dt = profs_lu_w1,
  var_prof = "professional_3",
  outcomes_prop = c("female", "edu_high", "white"),
  outcomes_median = c("age"),
  outcomes_mean_raw = c("party", "ft_outparty"),
  outcomes_mean_01 = c("ideo"),
  order = c(
    "age", "female", "edu_high", "white", "party",
    "ideo", "ft_outparty"
  ),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

summary_lu_w2 <- make_table(
  dt = profs_lu_w2,
  var_prof = "professional_3",
  outcomes_prop = NULL,
  outcomes_median = NULL,
  outcomes_mean_raw = NULL,
  outcomes_mean_01 = c("int_politics"),
  order = c("int_politics"),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

summary_lu_w3 <- make_table(
  dt = profs_lu_w3,
  var_prof = "professional_3",
  outcomes_prop = NULL,
  outcomes_median = NULL,
  outcomes_mean_raw = NULL,
  outcomes_mean_01 = c("polknow"),
  order = c("polknow"),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

summary_lu <- rbind(summary_lu_w1, summary_lu_w2, summary_lu_w3)

summary_lu <- summary_lu %>%
  # we do not report SD for median
  mutate(
    pro = ifelse(variable == "age", gsub(" \\(.+\\)", "", pro), pro),
    nonpro = ifelse(variable == "age", gsub(" \\(.+\\)", "", nonpro), nonpro)
  )

### YouGov ####

summary_yg <- make_table(
  dt = profs_yg,
  var_prof = "professional_3",
  outcomes_prop = c("female", "edu_high", "white"),
  outcomes_median = c("age"),
  outcomes_mean_raw = c("party", "ft_outparty"),
  outcomes_mean_01 = c("ideo", "int_politics", "foll_politics", "polknow"),
  order = c(
    "age", "female", "edu_high", "white", "party",
    "ideo", "ft_outparty", "int_politics", "foll_politics", "polknow"
  ),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)


summary_yg <- summary_yg %>%
  # we do not report SD for median
  mutate(
    pro = ifelse(variable == "age", gsub(" \\(.+\\)", "", pro), pro),
    nonpro = ifelse(variable == "age", gsub(" \\(.+\\)", "", nonpro), nonpro)
  )

### Combine ####

summary_all <- summary_lu %>%
  select(-p) %>%
  rename("lucid_pro" = pro, "lucid_nonpro" = nonpro, "lucid_sig" = sig) %>%
  full_join(., summary_yg %>% select(-p) %>%
    rename("yg_pro" = pro, "yg_nonpro" = nonpro, "yg_sig" = sig)) %>%
  full_join(., summary_fb %>% select(-p) %>%
    rename("fb_pro" = pro, "fb_nonpro" = nonpro, "fb_sig" = sig)) %>%
  full_join(., summary_pop) %>%
  mutate(varname = case_when(
    variable == "age" ~ "Age (median years) ",
    variable == "female" ~ "Gender (\\% female)",
    variable == "edu_high" ~ "Education (\\% Bachelor or more)",
    variable == "white" ~ "Ethnicity (\\% white)",
    variable == "ideo" ~ "Ideology (0-1)",
    variable == "party" ~ "Partisanship (1-7)",
    variable == "ft_outparty" ~ "Thermometer out-party (1-100)",
    variable == "int_politics" ~ "Political interest (0-1)",
    variable == "polknow" ~ "Political knowledge (0-1)",
    variable == "foll_politics" ~ "Following politics (0-1)"
  )) %>%
  mutate(across(everything(), ~ ifelse(is.na(.), "", .))) %>%
  select(
    varname, population,
    fb_pro, fb_sig, fb_nonpro,
    lucid_pro, lucid_sig, lucid_nonpro,
    yg_pro, yg_sig, yg_nonpro
  )

export_kable_sociodem(
  dt = summary_all,
  caption = "Survey professionals vs. non-professionals vs. population (professional = more than 50 percent of browsing time to survey sites)",
  format = "html",
  file = "output/tabD5_rq2_comparison_sociodems_3.html"
)

## Table D.6: Survey professionals vs. non-professionals vs. population (measure any) ####

### Facebook ####

summary_fb <- make_table(
  dt = profs_fb,
  var_prof = "professional_all",
  outcomes_prop = c("female", "edu_high", "white"),
  outcomes_median = c("age"),
  outcomes_mean_raw = c("party", "ft_outparty"),
  outcomes_mean_01 = c("ideo", "int_politics", "foll_politics"),
  order = c(
    "age", "female", "edu_high", "white", "party",
    "ideo", "ft_outparty", "int_politics", "foll_politics"
  ),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

# age is a group indicator for the FB data, so we label it with the age bracket
summary_fb <- summary_fb %>%
  mutate(
    pro = ifelse(variable == "age", gsub(" \\(.+\\)", "", pro), pro),
    nonpro = ifelse(variable == "age", gsub(" \\(.+\\)", "", nonpro), nonpro)
  ) %>%
  mutate(across(c(pro, nonpro), ~ case_when(
    . == "4" ~ "30-34",
    . == "5" ~ "35-39",
    . == "6" ~ "40-44",
    . == "7" ~ "45-49",
    .default = .
  )))

### Lucid ####

summary_lu_w1 <- make_table(
  dt = profs_lu_w1,
  var_prof = "professional_all",
  outcomes_prop = c("female", "edu_high", "white"),
  outcomes_median = c("age"),
  outcomes_mean_raw = c("party", "ft_outparty"),
  outcomes_mean_01 = c("ideo"),
  order = c(
    "age", "female", "edu_high", "white", "party",
    "ideo", "ft_outparty"
  ),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

summary_lu_w2 <- make_table(
  dt = profs_lu_w2,
  var_prof = "professional_all",
  outcomes_prop = NULL,
  outcomes_median = NULL,
  outcomes_mean_raw = NULL,
  outcomes_mean_01 = c("int_politics"),
  order = c("int_politics"),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

summary_lu_w3 <- make_table(
  dt = profs_lu_w3,
  var_prof = "professional_all",
  outcomes_prop = NULL,
  outcomes_median = NULL,
  outcomes_mean_raw = NULL,
  outcomes_mean_01 = c("polknow"),
  order = c("polknow"),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

summary_lu <- rbind(summary_lu_w1, summary_lu_w2, summary_lu_w3)

summary_lu <- summary_lu %>%
  # we do not report SD for median
  mutate(
    pro = ifelse(variable == "age", gsub(" \\(.+\\)", "", pro), pro),
    nonpro = ifelse(variable == "age", gsub(" \\(.+\\)", "", nonpro), nonpro)
  )

### YouGov ####

summary_yg <- make_table(
  dt = profs_yg,
  var_prof = "professional_all",
  outcomes_prop = c("female", "edu_high", "white"),
  outcomes_median = c("age"),
  outcomes_mean_raw = c("party", "ft_outparty"),
  outcomes_mean_01 = c("ideo", "int_politics", "foll_politics", "polknow"),
  order = c(
    "age", "female", "edu_high", "white", "party",
    "ideo", "ft_outparty", "int_politics", "foll_politics", "polknow"
  ),
  weight_var = "weight",
  output = "html" # for latex output, set "latex"
)

summary_yg <- summary_yg %>%
  # we do not report SD for median
  mutate(
    pro = ifelse(variable == "age", gsub(" \\(.+\\)", "", pro), pro),
    nonpro = ifelse(variable == "age", gsub(" \\(.+\\)", "", nonpro), nonpro)
  )

### Combine ####

summary_all <- summary_lu %>%
  select(-p) %>%
  rename("lucid_pro" = pro, "lucid_nonpro" = nonpro, "lucid_sig" = sig) %>%
  full_join(., summary_yg %>% select(-p) %>%
    rename("yg_pro" = pro, "yg_nonpro" = nonpro, "yg_sig" = sig)) %>%
  full_join(., summary_fb %>% select(-p) %>%
    rename("fb_pro" = pro, "fb_nonpro" = nonpro, "fb_sig" = sig)) %>%
  full_join(., summary_pop) %>%
  mutate(varname = case_when(
    variable == "age" ~ "Age (median years) ",
    variable == "female" ~ "Gender (\\% female)",
    variable == "edu_high" ~ "Education (\\% Bachelor or more)",
    variable == "white" ~ "Ethnicity (\\% white)",
    variable == "ideo" ~ "Ideology (0-1)",
    variable == "party" ~ "Partisanship (1-7)",
    variable == "ft_outparty" ~ "Thermometer out-party (1-100)",
    variable == "int_politics" ~ "Political interest (0-1)",
    variable == "polknow" ~ "Political knowledge (0-1)",
    variable == "foll_politics" ~ "Following politics (0-1)"
  )) %>%
  mutate(across(everything(), ~ ifelse(is.na(.), "", .))) %>%
  select(
    varname, population,
    fb_pro, fb_sig, fb_nonpro,
    lucid_pro, lucid_sig, lucid_nonpro,
    yg_pro, yg_sig, yg_nonpro
  )

export_kable_sociodem(
  dt = summary_all,
  caption = "Survey professionals vs. non-professionals vs. population (professional = any of the categories)",
  format = "html",
  file = "output/tabD6_rq2_comparison_sociodems_any.html"
)
